library(data.table)
library(lfe)
library(lubridate)
library(ggplot2)
library(did2s)
library(stargazer)
rm(list=ls())
setwd("")


fData <- fread("Covid_panel_george_new.csv")

#create binary variables for party id
fData <- fData[,Party_orientation := ifelse(Party_orientation%in%1:3,Party_orientation,NA)]
fData <- fData[,Democrat_bin := as.numeric(Party_orientation==1)]
fData <- fData[,Republican_bin := as.numeric(Party_orientation==2)]
fData <- fData[,Independent_bin := as.numeric(Party_orientation==3)]

#clean date
fData <- fData[,Date_corr := ymd(date_yyyymmdd)]

#dummy for protest counties
fData <- fData[,Protest_bin_corr := as.numeric(tot_protests>=1)]

#get week
fData <- fData[,Week_corr := round(difftime(Date_corr,dmy("01-01-2020"),units="weeks"))]

#get dummy for post GF
fData <- fData[,After_protest := as.numeric(Date_corr>=dmy("26-05-2020"))]

#get treatment dummy
fData <- fData[,Protest_treatment := ifelse(Protest_bin_corr==1&Date_corr>=dmy("26-05-2020"),1,0)] #create binary protest var

#Create protest data: only until 31-07-2020, and remove protest period.
fData_protests <- fData[year_only==2020]
fData_protests <- fData_protests[Date_corr<dmy("31-07-2020")] #set end date 
fData_protests <- fData_protests[!Date_corr%in%dmy("26-05-2020"):dmy("07-06-2020")] #remove protest window

#Create variable 'weeks to treatment'
fData_protests <- fData_protests[order(Date_corr)]
fData_protests <- fData_protests[,Weeks_to_treatment := round(difftime(Date_corr,dmy("26-05-2020"),units="weeks"))]
fData_protests <- fData_protests[,Weeks_to_treatment := ifelse(Protest_bin_corr==0,Inf,Weeks_to_treatment)]

#remove data with NAs
fData_protests <- fData_protests[Protest_treatment%in%c(0,1)]
fData_protests <- fData_protests[!is.na(Date_corr)]

########### Table A1: Summary statistics table ############

fSumStat <- fData_protests[,c("Democrat_bin","Protest_bin_corr")]
fSumStat_protest <- fData_protests[Protest_bin_corr>0,c("Democrat_bin")]

lSumStat <- lapply(fSumStat, function(x){
  temp <- data.frame(Mean = mean(x,na.rm=T),Min = min(x,na.rm=T),Max= max(x,na.rm=T))
  return(temp)
})

lSumStat_protest <- lapply(fSumStat_protest, function(x){
  temp <- data.frame(Mean = mean(x,na.rm=T),Min = min(x,na.rm=T), Max= max(x,na.rm=T))
  return(temp)
})


fSumStat <- do.call(rbind.data.frame,lSumStat)
fSumStat_protest <- do.call(rbind.data.frame,lSumStat_protest)
fSumStat_both <- rbind.data.frame(fSumStat,fSumStat_protest)

colnames(fSumStat_both) <- c("Mean","Min","Max")
rownames(fSumStat_both) <- c("Democrat","Protest county","Democrat, protest county")

fSumStat_tex <- stargazer(fSumStat_both,
                          summary=F,
                          omit.summary.stat = "N",
                          digits=2,
                          median=T,
                          font.size = "scriptsize", 
                          label = "Summary_stats_gallup",
                          title = "Summary Statistics Gallup Covid Panel",
                          # covariate.labels = c("Change in Democratic vote share",
                          #                      "Protests (Yes/No)",
                          #                      "Number of protests",
                          #                      "Rainfall",
                          #                      "Number of protesters (1000s)",
                          #                      "Number of protesters (1000s, high estimate)"),
                          table.placement = "H")




########### Figure 1: Dynamic effects, covid panel ############
reg_dyn_gard_1 <- did2s(fData_protests[!is.na(Democrat_bin)&!is.na(Weeks_to_treatment)],
                        yname = "Democrat_bin", 
                        first_stage = ~ 0| fips + Week_corr, 
                        second_stage = ~i(Weeks_to_treatment, ref=c(0, Inf)), 
                        treatment = "Protest_treatment", 
                        cluster_var = "fips")

pdf("Graphs/Dynamic_did_covid_panel.pdf")
fixest::iplot(reg_dyn_gard_1, main = "Identify as a Democrat", xlab = "Relative time to treatment", col = "steelblue", ref.line = 0.5)
dev.off()



########### Figure A7: Event study graph for Diff-in-diff with county-level voting data from 2000 - 2020 ###############
fData_county <- fread("Final_data_counties_stata_new.csv")

#only select relevant vars
fData_county <- fData_county[,c("FIPS_code_5char","Frac_dem_2020", "Frac_dem_2016","Frac_dem_2012","Frac_dem_2008","Frac_dem_2004","Frac_dem_2000","Tot_protests","Days_of_protests",
                                "Estimate_low_tot_pop_100","PRCP_total")]

#reshape to long format
fData_county_long_dem <- reshape2::melt(fData_county,id.vars=c("FIPS_code_5char","PRCP_total","Tot_protests","Days_of_protests","Estimate_low_tot_pop_100"),measure.vars=c("Frac_dem_2020","Frac_dem_2016","Frac_dem_2012","Frac_dem_2008","Frac_dem_2004","Frac_dem_2000")) 

colnames(fData_county_long_dem)[colnames(fData_county_long_dem)=="variable"] <- "Year"
colnames(fData_county_long_dem)[colnames(fData_county_long_dem)=="value"] <- "Dem_vote_share"
fData_county_long_dem <- as.data.table(fData_county_long_dem)[,Year := as.numeric(gsub("Frac_dem_","",Year))]
fData_county_long_dem <-fData_county_long_dem[order(FIPS_code_5char,Year)]

#create treatment dummies
fData_county_long_dem <- fData_county_long_dem[,Post_treatment := as.numeric(Year>=2020)]
fData_county_long_dem <- fData_county_long_dem[,Treated_county := as.numeric(Tot_protests >0)]
fData_county_long_dem <- fData_county_long_dem[,Treatment := as.numeric(Post_treatment*Treated_county)]

#Get election-to-treatment counter
fData_county_long_dem <- fData_county_long_dem[,Elections_to_treament := ((Year-2020)/4)]
fData_county_long_dem <- fData_county_long_dem[,Elections_to_treatment_did2s := ifelse(Treated_county==1,Elections_to_treament,Inf)]

fData_county_long_dem <- fData_county_long_dem[,First_treatment := ifelse(Treated_county==1,2020,NA)]
fData_county_long_dem <- fData_county_long_dem[,First_treatment_election := ifelse(Treated_county==1,0,NA)]

#Dynamic treatment effect
did_analysis_elections_dynamic <- did2s(fData_county_long_dem, 
                                        yname = "Dem_vote_share", 
                                        first_stage = ~1| FIPS_code_5char + Year, 
                                        second_stage = ~i(Elections_to_treatment_did2s, ref=c(-1, Inf)), 
                                        treatment = "Treatment", 
                                        cluster_var = "FIPS_code_5char")

pdf(paste0("Graphs/Election_did_eventstudy.pdf"))
fixest::iplot(did_analysis_elections_dynamic, main = "", xlab = "Elections to treatment",ylab="Coefficient", col = "steelblue", ref.line = -0.5)
dev.off()
gc()



########### Figure 5:  Diff-in-diff with county-level voting data from 2000 - 2020 ###############

#Export table with main results
did_analysis_elections_static <- did2s(fData_county_long_dem, 
                                       yname = "Dem_vote_share", 
                                       first_stage = ~1| FIPS_code_5char + Year, 
                                       second_stage = ~i(Treatment, ref=F), 
                                       treatment = "Treatment", 
                                       cluster_var = "FIPS_code_5char")


proxy_reg <- felm(Dem_vote_share~0+Treatment, data=fData_county_long_dem) #use this to make tables with stargazer


lList_did_static <- list(did_analysis_elections_static)

reg_table_covs_spilltreat_static <- stargazer(proxy_reg,
                                              coef = unlist(lapply(lList_did_static, function(x) summary(x)$coefficients)),
                                              se = unlist(lapply(lList_did_static, function(x) summary(x)$coeftable[,"Std. Error"])),
                                              covariate.labels = "Protests (yes/no)",
                                              # column.labels = c("Catholic","Catholic father","Catholic mother","Church attendance"),
                                              model.numbers = F,
                                              dep.var.caption = "",
                                              dep.var.labels = "",
                                              title = "Effect of BLM protests on voting, difference-in-differences",
                                              label = "did_county_voting",
                                              font.size = "scriptsize",
                                              omit.stat = c("all"),
                                              add.lines = list(c("County fixed effects",rep("Yes",4)),
                                                               c("Election fixed effects",rep("Yes",4)),
                                                               c("Observations",prettyNum(unlist(lapply(lList_did_static, function(x) x$nobs)),big.mark = ",")),
                                                               c("Adjusted R-squared",round(unlist(lapply(lList_did_static, function(x) summary(x)$sq.cor)),3))
                                                               
                                                               
                                              )
)
